package au.com.acpfg.misc.uniprot; import java.util.ArrayList; import java.util.List; import org.knime.core.data.DataCell; import org.knime.core.data.DataColumnSpec; import org.knime.core.data.DataColumnSpecCreator; import org.knime.core.data.DataTableSpec; import org.knime.core.data.DataType; import org.knime.core.data.collection.ListCell; import org.knime.core.data.def.IntCell; import org.knime.core.data.def.StringCell; import org.xml.sax.helpers.XMLReaderFactory; import au.com.acpfg.xml.reader.XMLCell; import com.fatdog.xmlEngine.ResultList; import com.fatdog.xmlEngine.XQEngine; public class UniRefEntryTask extends RetrieveEntryTask { private final int NUM_COLUMNS = 6; public UniRefEntryTask(UniProtAccessorNodeModel m, String db) { super(m, "/uniref/"+db+"_"); } @Override public DataTableSpec getTableSpec(boolean want_xml) { DataColumnSpec[] cols = new DataColumnSpec[NUM_COLUMNS]; DataType dt = ListCell.getCollectionType(StringCell.TYPE); cols[0] = new DataColumnSpecCreator("UniRef Member Count", IntCell.TYPE).createSpec(); cols[1] = new DataColumnSpecCreator("UniRef Common Taxon", StringCell.TYPE).createSpec(); cols[2] = new DataColumnSpecCreator("UniRef Member UniProtKB Accessions", dt).createSpec(); cols[3] = new DataColumnSpecCreator("UniRef Member Proteins", dt).createSpec(); cols[4] = new DataColumnSpecCreator("UniRef Source Organisms", dt).createSpec(); cols[5] = new DataColumnSpecCreator("UniRef XML Output", XMLCell.TYPE).createSpec(); return new DataTableSpec(cols); } @Override protected DataCell[] grok_entry(String xml) throws Exception { DataCell[] cells = new DataCell[NUM_COLUMNS]; XQEngine eng = new XQEngine(); eng.setXMLReader(XMLReaderFactory.createXMLReader()); //Logger.getAnonymousLogger().info("XML is: "+xml); eng.setExplicitDocument(xml); ResultList rl = eng.setQuery("//property[@type='member count']"); if (rl.getNumTotalItems() == 1) { Integer mem_cnt = new Integer(UniProtHit.extract_attribute(rl.emitXml(), "value")); cells[0] = new IntCell(mem_cnt.intValue()); } else { cells[0] = DataType.getMissingCell(); } rl = eng.setQuery("//property[@type='common taxon']"); if (rl.getNumTotalItems() == 1) { cells[1] = safe_string(UniProtHit.extract_attribute(rl.emitXml(), "value")); } else { cells[1] = DataType.getMissingCell(); } List<String> accsns = add_members(eng, "UniProtKB accession"); cells[2] = list2listcell(accsns); List<String> proteins = add_members(eng, "protein name"); cells[3] = list2listcell(proteins); List<String> organisms= add_members(eng, "source organism"); cells[4] = list2listcell(organisms); cells[5] = new XMLCell(xml); return cells; } protected List<String> add_members(XQEngine eng, String attName) throws Exception { ResultList rl = eng.setQuery("/UniRef/entry/member/dbReference/property[@type='"+attName+"']"); String xml = rl.emitXml(); //Logger.getAnonymousLogger().info(xml); String[] members = xml.split("/>"); ArrayList<String> ret = new ArrayList<String>(); for (String member : members) { if (member.trim().length() > 0) { ret.add(UniProtHit.extract_attribute(member, "value")); } } // representative member data MUST always be first in results rl = eng.setQuery("/UniRef/entry/representativeMember/dbReference/property[@type='"+attName+"']"); ret.add(0, UniProtHit.extract_attribute(rl.emitXml(), "value")); return ret; } }